1. Getting Data
In [ ]:
import yfinance as yf
import pandas as pd
import numpy as np
import time
symbols = { 'Crude_Oil':"CL=F", 'Brent_Oil':"BZ=F", 'Natural_Gas':"NG=F", 'RBOB_Gasoline':"RB=F", 'Heating_Oil':"HO=F", 'Energy_SPDR_ETF':"XLE", 'VanEck_Oil_ETF':"OIH", 'SPDR_S&P_Exploration_ETF':"XOP", 'ExxonMobil':"XOM", 'Chevron':"CVX", 'BPplc':"BP", 'Shellplc':"SHEL", 'TotalEnergies':"TTE", 'Gold':"GC=F", 'US_13week_Treasury':"^IRX", 'US_10year_Treasury':"^TNX", 'Volatility_Index':"^VIX" }
start_date = "2023-01-01"
end_date = "2023-12-31"
def fetch_and_save_data(symbols, start_date, end_date):
  for name, symbol in symbols.items():
    print(f"Fetching data for {name} ({symbol})...")
        # Fetch historical data
    data = yf.download(symbol, start=start_date, end=end_date)

    # Check if data is returned
    if data.empty:
        print(f"No data found for {name} ({symbol}) in 2023.")
        continue

    # Remove missing data
    data.dropna(inplace=True)

    # Calculate Logarithmic Returns
    data['Log Return'] = np.log(data['Close'] / data['Close'].shift(1))

    # Calculate Simple Returns
    data['Simple Return'] = data['Close'].pct_change()

    # Calculate Rolling Volatility (Annualized)
    data['Volatility'] = data['Log Return'].rolling(window=21).std() * np.sqrt(252)

    # Save data to CSV
    filename = f"{name}_2023.csv"
    data.to_csv(filename)
    print(f"Data for {name} saved to {filename}")
    # Print summary statistics
    print(data[['Log Return', 'Simple Return', 'Volatility']].describe())

    # Sleep to respect API rate limits
    time.sleep(3)
In [ ]:
if __name__ == "__main__":
  print("Starting data fetch...")
  fetch_and_save_data(symbols, start_date, end_date)
  print(f"Data fetch completed.")
  import requests
  import pandas as pd
  from datetime import datetime
Starting data fetch...
Fetching data for Crude_Oil (CL=F)...
YF.download() has changed argument auto_adjust default to True
[*********************100%***********************]  1 of 1 completed
Data for Crude_Oil saved to Crude_Oil_2023.csv
Price   Log Return Simple Return  Volatility
Ticker                                      
count   249.000000    249.000000  229.000000
mean     -0.000286     -0.000060    0.332574
std       0.021288      0.021232    0.071661
min      -0.057785     -0.056147    0.155874
25%      -0.014003     -0.013906    0.288260
50%       0.002026      0.002028    0.345937
75%       0.013395      0.013485    0.388037
max       0.060881      0.062773    0.459743
[*********************100%***********************]  1 of 1 completed
Fetching data for Brent_Oil (BZ=F)...

Data for Brent_Oil saved to Brent_Oil_2023.csv
Price   Log Return Simple Return  Volatility
Ticker                                      
count   250.000000    250.000000  230.000000
mean     -0.000254     -0.000056    0.309385
std       0.019963      0.019898    0.076432
min      -0.057844     -0.056203    0.131897
25%      -0.011749     -0.011680    0.260908
50%       0.001553      0.001554    0.332548
75%       0.013642      0.013736    0.363194
max       0.062680      0.064686    0.447963
Fetching data for Natural_Gas (NG=F)...
[*********************100%***********************]  1 of 1 completed
Data for Natural_Gas saved to Natural_Gas_2023.csv
Price   Log Return Simple Return  Volatility
Ticker                                      
count   250.000000    250.000000  230.000000
mean     -0.001846     -0.000907    0.666541
std       0.043472      0.043258    0.171532
min      -0.156924     -0.145231    0.425533
25%      -0.031833     -0.031332    0.535535
50%      -0.000899     -0.000898    0.604309
75%       0.028986      0.029411    0.772925
max       0.108172      0.114239    1.057179
Fetching data for RBOB_Gasoline (RB=F)...
[*********************100%***********************]  1 of 1 completed
Data for RBOB_Gasoline saved to RBOB_Gasoline_2023.csv
Price   Log Return Simple Return  Volatility
Ticker                                      
count   250.000000    250.000000  230.000000
mean     -0.000464     -0.000211    0.356803
std       0.022547      0.022495    0.054429
min      -0.071156     -0.068684    0.231217
25%      -0.014693     -0.014585    0.323705
50%       0.002527      0.002530    0.357403
75%       0.015393      0.015512    0.388705
max       0.094215      0.098796    0.488964
[*********************100%***********************]  1 of 1 completed
Fetching data for Heating_Oil (HO=F)...
Data for Heating_Oil saved to Heating_Oil_2023.csv
Price   Log Return Simple Return  Volatility
Ticker                                      
count   250.000000    250.000000  230.000000
mean     -0.000759     -0.000509    0.353132
std       0.022432      0.022357    0.065057
min      -0.075426     -0.072652    0.194995
25%      -0.015041     -0.014928    0.312962
50%       0.000238      0.000238    0.347414
75%       0.014254      0.014356    0.387611
max       0.053332      0.054780    0.510412

Fetching data for Energy_SPDR_ETF (XLE)...
[*********************100%***********************]  1 of 1 completed
Data for Energy_SPDR_ETF saved to Energy_SPDR_ETF_2023.csv
Price   Log Return Simple Return  Volatility
Ticker                                      
count   249.000000    249.000000  229.000000
mean      0.000118      0.000220    0.225758
std       0.014287      0.014284    0.049664
min      -0.055196     -0.053700    0.130263
25%      -0.009359     -0.009315    0.189967
50%       0.000458      0.000458    0.223895
75%       0.009067      0.009108    0.254982
max       0.044278      0.045273    0.358635
Fetching data for VanEck_Oil_ETF (OIH)...
[*********************100%***********************]  1 of 1 completed
Data for VanEck_Oil_ETF saved to VanEck_Oil_ETF_2023.csv
Price   Log Return Simple Return  Volatility
Ticker                                      
count   249.000000    249.000000  229.000000
mean      0.000322      0.000523    0.310716
std       0.020092      0.020058    0.085889
min      -0.076020     -0.073203    0.150593
25%      -0.009345     -0.009301    0.251619
50%       0.000276      0.000276    0.314504
75%       0.013242      0.013330    0.362974
max       0.066095      0.068329    0.536218
[*********************100%***********************]  1 of 1 completed
Fetching data for SPDR_S&P_Exploration_ETF (XOP)...
Data for SPDR_S&P_Exploration_ETF saved to SPDR_S&P_Exploration_ETF_2023.csv

Price   Log Return Simple Return  Volatility
Ticker                                      
count   249.000000    249.000000  229.000000
mean      0.000372      0.000530    0.281353
std       0.017823      0.017817    0.065161
min      -0.065403     -0.063311    0.161612
25%      -0.012211     -0.012137    0.236566
50%       0.001138      0.001138    0.278833
75%       0.012228      0.012303    0.314357
max       0.050920      0.052238    0.431697
Fetching data for ExxonMobil (XOM)...
[*********************100%***********************]  1 of 1 completed
Data for ExxonMobil saved to ExxonMobil_2023.csv
Price   Log Return Simple Return  Volatility
Ticker                                      
count   249.000000    249.000000  229.000000
mean     -0.000119      0.000003    0.248197
std       0.015660      0.015677    0.050133
min      -0.051027     -0.049747    0.157601
25%      -0.011008     -0.010947    0.206351
50%       0.000090      0.000090    0.244240
75%       0.009022      0.009063    0.284913
max       0.057326      0.059000    0.377416
Fetching data for Chevron (CVX)...
[*********************100%***********************]  1 of 1 completed
Data for Chevron saved to Chevron_2023.csv
Price   Log Return Simple Return  Volatility
Ticker                                      
count   249.000000    249.000000  229.000000
mean     -0.000464     -0.000359    0.226455
std       0.014534      0.014484    0.055295
min      -0.069570     -0.067205    0.133583
25%      -0.008345     -0.008310    0.188684
50%      -0.000120     -0.000120    0.213583
75%       0.007778      0.007809    0.256822
max       0.047492      0.048637    0.367280
Fetching data for BPplc (BP)...
[*********************100%***********************]  1 of 1 completed
Data for BPplc saved to BPplc_2023.csv
Price   Log Return Simple Return  Volatility
Ticker                                      
count   249.000000    249.000000  229.000000
mean      0.000305      0.000429    0.248770
std       0.015833      0.015799    0.067488
min      -0.084175     -0.080730    0.140689
25%      -0.008465     -0.008429    0.190797
50%       0.000777      0.000777    0.243896
75%       0.008956      0.008996    0.313371
max       0.080219      0.083525    0.376885
Fetching data for Shellplc (SHEL)...
[*********************100%***********************]  1 of 1 completed
Data for Shellplc saved to Shellplc_2023.csv
Price   Log Return Simple Return  Volatility
Ticker                                      
count   249.000000    249.000000  229.000000
mean      0.000808      0.000902    0.216359
std       0.013770      0.013763    0.058839
min      -0.067011     -0.064815    0.100732
25%      -0.006992     -0.006968    0.180034
50%       0.001603      0.001605    0.215089
75%       0.009200      0.009242    0.233886
max       0.050328      0.051616    0.392383
Fetching data for TotalEnergies (TTE)...
[*********************100%***********************]  1 of 1 completed
Data for TotalEnergies saved to TotalEnergies_2023.csv
Price   Log Return Simple Return  Volatility
Ticker                                      
count   249.000000    249.000000  229.000000
mean      0.000574      0.000677    0.228171
std       0.014331      0.014342    0.053382
min      -0.044171     -0.043210    0.114297
25%      -0.009592     -0.009546    0.190619
50%       0.001971      0.001973    0.231794
75%       0.010083      0.010134    0.255760
max       0.066970      0.069263    0.391471
[*********************100%***********************]  1 of 1 completed
Fetching data for Gold (GC=F)...
Data for Gold saved to Gold_2023.csv
Price   Log Return Simple Return  Volatility
Ticker                                      
count   249.000000    249.000000  229.000000
mean      0.000459      0.000494    0.127636
std       0.008321      0.008331    0.035496
min      -0.028262     -0.027866    0.074316
25%      -0.004029     -0.004021    0.099957
50%       0.000108      0.000108    0.121441
75%       0.004800      0.004812    0.152001
max       0.030608      0.031081    0.219037
[*********************100%***********************]  1 of 1 completed
Fetching data for US_13week_Treasury (^IRX)...
Data for US_13week_Treasury saved to US_13week_Treasury_2023.csv
Price   Log Return Simple Return  Volatility
Ticker                                      
count   249.000000    249.000000  229.000000
mean      0.000790      0.000830    0.105136
std       0.008907      0.008920    0.093383
min      -0.053736     -0.052318    0.021988
25%      -0.001909     -0.001907    0.037481
50%       0.000381      0.000381    0.060702
75%       0.002853      0.002857    0.144180
max       0.039283      0.040065    0.356717
[*********************100%***********************]  1 of 1 completed
Fetching data for US_10year_Treasury (^TNX)...
Data for US_10year_Treasury saved to US_10year_Treasury_2023.csv
Price   Log Return Simple Return  Volatility
Ticker                                      
count   249.000000    249.000000  229.000000
mean      0.000077      0.000258    0.294563
std       0.019115      0.019065    0.067933
min      -0.060386     -0.058599    0.179998
25%      -0.012288     -0.012213    0.256861
50%       0.000000      0.000000    0.280897
75%       0.012911      0.012994    0.312546
max       0.042855      0.043787    0.505455
[*********************100%***********************]  1 of 1 completed
Fetching data for Volatility_Index (^VIX)...
Data for Volatility_Index saved to Volatility_Index_2023.csv
Price   Log Return Simple Return  Volatility
Ticker                                      
count   249.000000    249.000000  229.000000
mean     -0.002447     -0.000950    0.866112
std       0.054650      0.055161    0.212825
min      -0.155894     -0.144350    0.404276
25%      -0.039640     -0.038865    0.714051
50%      -0.004484     -0.004474    0.881074
75%       0.024762      0.025071    1.011744
max       0.168181      0.183150    1.328913

Data fetch completed.
In [ ]:
import pandas as pd

# Function to preprocess the data
def preprocess_data(filename):
    # Read CSV (Skip first two rows if necessary)
    df = pd.read_csv(filename, skiprows=2)

    # Convert 'Date' column to datetime format
    df['Date'] = pd.to_datetime(df['Date'])

    # Set 'Date' as Index
    df.set_index('Date', inplace=True)

    # Rename columns to appropriate names
    df.columns = ['Open', 'High', 'Low', 'Close', 'Volume', 'Log Return', 'Simple Return', 'Volatility']

    # Clean and convert columns to numeric types
    for col in ['Open', 'High', 'Low', 'Close', 'Volume']:
        # Convert to string first, then replace commas and convert to numeric
        df[col] = pd.to_numeric(df[col].astype(str).str.replace(',', ''), errors='coerce')

    # Check for any NaN values after conversion
    if df.isnull().values.any():
        print(f"Warning: There are NaN values in the DataFrame after conversion in {filename}.")

    return df

# List of files to preprocess
files = [
    "Crude_Oil_2023.csv",
    "Brent_Oil_2023.csv",
    "Natural_Gas_2023.csv",
    "RBOB_Gasoline_2023.csv",
    "Heating_Oil_2023.csv",
    "Energy_SPDR_ETF_2023.csv",
    "VanEck_Oil_ETF_2023.csv",
    "SPDR_S&P_Exploration_ETF_2023.csv",
    "ExxonMobil_2023.csv",
    "Chevron_2023.csv",
    "BPplc_2023.csv",
    "Shellplc_2023.csv",
    "TotalEnergies_2023.csv",
    "Gold_2023.csv",
    "US_13week_Treasury_2023.csv",
    "US_10year_Treasury_2023.csv",
    "Volatility_Index_2023.csv"
]

# Preprocess each file
dataframes = {}
for file in files:
    dataframes[file] = preprocess_data(file)
    print(f"Processed {file}:\n", dataframes[file].head(), "\n")
    print(dataframes[file].info(), "\n")
    print(dataframes[file].describe(), "\n")
    print(dataframes[file].dtypes)
Warning: There are NaN values in the DataFrame after conversion in Crude_Oil_2023.csv.
Processed Crude_Oil_2023.csv:
                  Open       High        Low      Close  Volume  Log Return  \
Date                                                                         
2023-01-03  76.930000  81.500000  76.599998  80.570000  338520         NaN   
2023-01-04  72.839996  77.419998  72.730003  77.250000  352434   -0.054631   
2023-01-05  73.669998  74.919998  72.459999  73.250000  300731    0.011330   
2023-01-06  73.769997  75.470001  73.239998  73.970001  258128    0.001356   
2023-01-09  74.629997  76.739998  73.470001  73.470001  329290    0.011590   

            Simple Return  Volatility  
Date                                   
2023-01-03            NaN         NaN  
2023-01-04      -0.053165         NaN  
2023-01-05       0.011395         NaN  
2023-01-06       0.001357         NaN  
2023-01-09       0.011658         NaN   

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 250 entries, 2023-01-03 to 2023-12-29
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Open           250 non-null    float64
 1   High           250 non-null    float64
 2   Low            250 non-null    float64
 3   Close          250 non-null    float64
 4   Volume         250 non-null    int64  
 5   Log Return     249 non-null    float64
 6   Simple Return  249 non-null    float64
 7   Volatility     229 non-null    float64
dtypes: float64(7), int64(1)
memory usage: 17.6 KB
None 

             Open        High         Low       Close         Volume  \
count  250.000000  250.000000  250.000000  250.000000     250.000000   
mean    77.597120   78.885520   76.370440   77.726560  313673.744000   
std      6.008919    5.989190    6.098372    6.032712   97171.270189   
min     66.739998   67.699997   63.639999   66.620003       0.000000   
25%     72.832500   74.127502   71.712500   73.032503  275205.500000   
50%     77.084999   78.074997   75.690002   77.155003  321811.500000   
75%     81.017498   82.344997   80.099998   81.275002  367279.500000   
max     93.680000   95.029999   91.389999   93.779999  559169.000000   

       Log Return  Simple Return  Volatility  
count  249.000000     249.000000  229.000000  
mean    -0.000286      -0.000060    0.332574  
std      0.021288       0.021232    0.071661  
min     -0.057785      -0.056147    0.155874  
25%     -0.014003      -0.013906    0.288260  
50%      0.002026       0.002028    0.345937  
75%      0.013395       0.013485    0.388037  
max      0.060881       0.062773    0.459743   

Open             float64
High             float64
Low              float64
Close            float64
Volume             int64
Log Return       float64
Simple Return    float64
Volatility       float64
dtype: object
Warning: There are NaN values in the DataFrame after conversion in Brent_Oil_2023.csv.
Processed Brent_Oil_2023.csv:
                  Open       High        Low      Close  Volume  Log Return  \
Date                                                                         
2023-01-03  82.099998  87.019997  81.769997  86.040001   27559         NaN   
2023-01-04  77.839996  82.669998  77.720001  82.230003   24772   -0.053283   
2023-01-05  78.690002  79.959999  77.610001  78.089996   28051    0.010861   
2023-01-06  78.570000  80.570000  78.050003  78.809998   23767   -0.001526   
2023-01-09  79.650002  81.370003  78.339996  78.480003   29985    0.013652   

            Simple Return  Volatility  
Date                                   
2023-01-03            NaN         NaN  
2023-01-04      -0.051888         NaN  
2023-01-05       0.010920         NaN  
2023-01-06      -0.001525         NaN  
2023-01-09       0.013746         NaN   

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 251 entries, 2023-01-03 to 2023-12-29
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Open           251 non-null    float64
 1   High           251 non-null    float64
 2   Low            251 non-null    float64
 3   Close          251 non-null    float64
 4   Volume         251 non-null    int64  
 5   Log Return     250 non-null    float64
 6   Simple Return  250 non-null    float64
 7   Volatility     230 non-null    float64
dtypes: float64(7), int64(1)
memory usage: 17.6 KB
None 

             Open        High         Low       Close        Volume  \
count  251.000000  251.000000  251.000000  251.000000    251.000000   
mean    82.190438   83.381753   80.988407   82.243865  26777.577689   
std      5.577078    5.498371    5.664448    5.564913   9075.619608   
min     71.839996   73.730003   68.199997   71.889999      0.000000   
25%     77.445000   78.715000   76.450001   77.579998  21021.000000   
50%     82.470001   83.470001   81.070000   82.580002  26913.000000   
75%     85.814999   86.790001   84.555000   85.844997  32191.000000   
max     96.550003   97.629997   94.959999   96.620003  59320.000000   

       Log Return  Simple Return  Volatility  
count  250.000000     250.000000  230.000000  
mean    -0.000254      -0.000056    0.309385  
std      0.019963       0.019898    0.076432  
min     -0.057844      -0.056203    0.131897  
25%     -0.011749      -0.011680    0.260908  
50%      0.001553       0.001554    0.332548  
75%      0.013642       0.013736    0.363194  
max      0.062680       0.064686    0.447963   

Open             float64
High             float64
Low              float64
Close            float64
Volume             int64
Log Return       float64
Simple Return    float64
Volatility       float64
dtype: object
Warning: There are NaN values in the DataFrame after conversion in Natural_Gas_2023.csv.
Processed Natural_Gas_2023.csv:
              Open   High    Low  Close  Volume  Log Return  Simple Return  \
Date                                                                        
2023-01-03  3.988  4.394  3.894  4.393  116837         NaN            NaN   
2023-01-04  4.172  4.219  3.900  4.008   99759    0.045106       0.046138   
2023-01-05  3.720  4.175  3.651  4.155  116682   -0.114672      -0.108341   
2023-01-06  3.710  3.839  3.520  3.764  105050   -0.002692      -0.002688   
2023-01-09  3.910  4.128  3.781  3.810  130276    0.052506       0.053908   

            Volatility  
Date                    
2023-01-03         NaN  
2023-01-04         NaN  
2023-01-05         NaN  
2023-01-06         NaN  
2023-01-09         NaN   

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 251 entries, 2023-01-03 to 2023-12-29
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Open           251 non-null    float64
 1   High           251 non-null    float64
 2   Low            251 non-null    float64
 3   Close          251 non-null    float64
 4   Volume         251 non-null    int64  
 5   Log Return     250 non-null    float64
 6   Simple Return  250 non-null    float64
 7   Volatility     230 non-null    float64
dtypes: float64(7), int64(1)
memory usage: 17.6 KB
None 

             Open        High         Low       Close         Volume  \
count  251.000000  251.000000  251.000000  251.000000     251.000000   
mean     2.665689    2.753845    2.591215    2.679028  137590.215139   
std      0.402081    0.425602    0.391230    0.417368   53342.669274   
min      1.991000    2.083000    1.944000    2.015000       0.000000   
25%      2.382500    2.465500    2.316000    2.394500  109707.000000   
50%      2.603000    2.674000    2.540000    2.620000  135522.000000   
75%      2.827000    2.896500    2.760500    2.815000  164728.500000   
max      4.172000    4.394000    3.900000    4.393000  330300.000000   

       Log Return  Simple Return  Volatility  
count  250.000000     250.000000  230.000000  
mean    -0.001846      -0.000907    0.666541  
std      0.043472       0.043258    0.171532  
min     -0.156924      -0.145231    0.425533  
25%     -0.031833      -0.031332    0.535535  
50%     -0.000899      -0.000898    0.604309  
75%      0.028986       0.029411    0.772925  
max      0.108172       0.114239    1.057179   

Open             float64
High             float64
Low              float64
Close            float64
Volume             int64
Log Return       float64
Simple Return    float64
Volatility       float64
dtype: object
Warning: There are NaN values in the DataFrame after conversion in RBOB_Gasoline_2023.csv.
Processed RBOB_Gasoline_2023.csv:
               Open    High     Low   Close  Volume  Log Return  Simple Return  \
Date                                                                            
2023-01-03  2.3612  2.5057  2.3484  2.4909   65711         NaN            NaN   
2023-01-04  2.2592  2.3754  2.2433  2.3501   40222   -0.044159      -0.043198   
2023-01-05  2.2671  2.3131  2.2554  2.2682   47548    0.003491       0.003497   
2023-01-06  2.2446  2.3187  2.2356  2.2700   45192   -0.009974      -0.009925   
2023-01-09  2.2929  2.3356  2.2510  2.2510   49984    0.021290       0.021518   

            Volatility  
Date                    
2023-01-03         NaN  
2023-01-04         NaN  
2023-01-05         NaN  
2023-01-06         NaN  
2023-01-09         NaN   

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 251 entries, 2023-01-03 to 2023-12-29
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Open           251 non-null    float64
 1   High           251 non-null    float64
 2   Low            251 non-null    float64
 3   Close          251 non-null    float64
 4   Volume         251 non-null    int64  
 5   Log Return     250 non-null    float64
 6   Simple Return  250 non-null    float64
 7   Volatility     230 non-null    float64
dtypes: float64(7), int64(1)
memory usage: 17.6 KB
None 

             Open        High         Low       Close         Volume  \
count  251.000000  251.000000  251.000000  251.000000     251.000000   
mean     2.509494    2.548939    2.468406    2.511362   45500.243028   
std      0.232477    0.229328    0.227835    0.229636   16806.739748   
min      1.979700    2.041000    1.967200    1.987100       0.000000   
25%      2.321550    2.359550    2.265700    2.314500   32296.000000   
50%      2.545100    2.587100    2.502100    2.544000   46981.000000   
75%      2.676700    2.711250    2.631850    2.678400   56205.000000   
max      2.964900    2.993600    2.924000    2.949000  121248.000000   

       Log Return  Simple Return  Volatility  
count  250.000000     250.000000  230.000000  
mean    -0.000464      -0.000211    0.356803  
std      0.022547       0.022495    0.054429  
min     -0.071156      -0.068684    0.231217  
25%     -0.014693      -0.014585    0.323705  
50%      0.002527       0.002530    0.357403  
75%      0.015393       0.015512    0.388705  
max      0.094215       0.098796    0.488964   

Open             float64
High             float64
Low              float64
Close            float64
Volume             int64
Log Return       float64
Simple Return    float64
Volatility       float64
dtype: object
Warning: There are NaN values in the DataFrame after conversion in Heating_Oil_2023.csv.
Processed Heating_Oil_2023.csv:
               Open    High     Low   Close  Volume  Log Return  Simple Return  \
Date                                                                            
2023-01-03  3.0865  3.3322  3.0755  3.2965   58567         NaN            NaN   
2023-01-04  2.9719  3.0912  2.9215  3.0682   51609   -0.037836      -0.037129   
2023-01-05  2.9723  3.0669  2.9200  3.0016   55068    0.000135       0.000135   
2023-01-06  3.0045  3.0536  2.9520  2.9789   41596    0.010775       0.010833   
2023-01-09  3.0360  3.1069  2.9949  3.0054   49943    0.010430       0.010484   

            Volatility  
Date                    
2023-01-03         NaN  
2023-01-04         NaN  
2023-01-05         NaN  
2023-01-06         NaN  
2023-01-09         NaN   

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 251 entries, 2023-01-03 to 2023-12-29
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Open           251 non-null    float64
 1   High           251 non-null    float64
 2   Low            251 non-null    float64
 3   Close          251 non-null    float64
 4   Volume         251 non-null    int64  
 5   Log Return     250 non-null    float64
 6   Simple Return  250 non-null    float64
 7   Volatility     230 non-null    float64
dtypes: float64(7), int64(1)
memory usage: 17.6 KB
None 

             Open        High         Low       Close        Volume  \
count  251.000000  251.000000  251.000000  251.000000    251.000000   
mean     2.813668    2.863859    2.765468    2.817678  41625.661355   
std      0.315287    0.323386    0.307672    0.316162  15232.702850   
min      2.232300    2.269500    2.150000    2.210200      0.000000   
25%      2.578600    2.621200    2.547800    2.580500  28271.500000   
50%      2.777600    2.836900    2.734000    2.784200  43513.000000   
75%      3.057050    3.108400    3.006750    3.069050  53725.000000   
max      3.550900    3.580000    3.461500    3.540600  75580.000000   

       Log Return  Simple Return  Volatility  
count  250.000000     250.000000  230.000000  
mean    -0.000759      -0.000509    0.353132  
std      0.022432       0.022357    0.065057  
min     -0.075426      -0.072652    0.194995  
25%     -0.015041      -0.014928    0.312962  
50%      0.000238       0.000238    0.347414  
75%      0.014254       0.014356    0.387611  
max      0.053332       0.054780    0.510412   

Open             float64
High             float64
Low              float64
Close            float64
Volume             int64
Log Return       float64
Simple Return    float64
Volatility       float64
dtype: object
Warning: There are NaN values in the DataFrame after conversion in Energy_SPDR_ETF_2023.csv.
Processed Energy_SPDR_ETF_2023.csv:
                  Open       High        Low      Close    Volume  Log Return  \
Date                                                                           
2023-01-03  78.797256  81.560761  77.816956  81.149968  26541400         NaN   
2023-01-04  78.787910  79.245381  77.331471  77.630228  22852600   -0.000119   
2023-01-05  80.225685  80.664486  78.433143  78.657209  19361900    0.018084   
2023-01-06  81.747475  82.746444  81.056600  81.261997  22211200    0.018791   
2023-01-09  81.458054  83.110552  81.196642  82.979846  23001600   -0.003547   

            Simple Return  Volatility  
Date                                   
2023-01-03            NaN         NaN  
2023-01-04      -0.000119         NaN  
2023-01-05       0.018249         NaN  
2023-01-06       0.018969         NaN  
2023-01-09      -0.003540         NaN   

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 250 entries, 2023-01-03 to 2023-12-29
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Open           250 non-null    float64
 1   High           250 non-null    float64
 2   Low            250 non-null    float64
 3   Close          250 non-null    float64
 4   Volume         250 non-null    int64  
 5   Log Return     249 non-null    float64
 6   Simple Return  249 non-null    float64
 7   Volatility     229 non-null    float64
dtypes: float64(7), int64(1)
memory usage: 17.6 KB
None 

             Open        High         Low       Close        Volume  \
count  250.000000  250.000000  250.000000  250.000000  2.500000e+02   
mean    80.606490   81.366511   79.860143   80.628933  1.987130e+07   
std      3.967614    3.945646    4.014046    4.049407  5.685873e+06   
min     71.860481   72.950053   70.357359   70.637447  9.576300e+06   
25%     77.535322   78.358228   77.143755   77.808466  1.636542e+07   
50%     81.172474   81.807018   80.463929   81.071801  1.897520e+07   
75%     83.351974   84.160872   82.582509   83.467739  2.283078e+07   
max     88.847862   89.161915   88.467193   88.966643  5.767350e+07   

       Log Return  Simple Return  Volatility  
count  249.000000     249.000000  229.000000  
mean     0.000118       0.000220    0.225758  
std      0.014287       0.014284    0.049664  
min     -0.055196      -0.053700    0.130263  
25%     -0.009359      -0.009315    0.189967  
50%      0.000458       0.000458    0.223895  
75%      0.009067       0.009108    0.254982  
max      0.044278       0.045273    0.358635   

Open             float64
High             float64
Low              float64
Close            float64
Volume             int64
Log Return       float64
Simple Return    float64
Volatility       float64
dtype: object
Warning: There are NaN values in the DataFrame after conversion in VanEck_Oil_ETF_2023.csv.
Processed VanEck_Oil_ETF_2023.csv:
                   Open        High         Low       Close   Volume  \
Date                                                                  
2023-01-03  279.780548  293.673451  274.969228  291.596285   762600   
2023-01-04  280.814331  283.104046  274.090099  275.056225   828600   
2023-01-05  286.108704  287.896014  279.683939  280.978545   670200   
2023-01-06  295.228912  299.180396  289.615720  290.833049  1247500   
2023-01-09  301.653687  307.788566  300.368722  302.523194  1131200   

            Log Return  Simple Return  Volatility  
Date                                               
2023-01-03         NaN            NaN         NaN  
2023-01-04    0.003688       0.003695         NaN  
2023-01-05    0.018678       0.018854         NaN  
2023-01-06    0.031379       0.031877         NaN  
2023-01-09    0.021529       0.021762         NaN   

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 250 entries, 2023-01-03 to 2023-12-29
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Open           250 non-null    float64
 1   High           250 non-null    float64
 2   Low            250 non-null    float64
 3   Close          250 non-null    float64
 4   Volume         250 non-null    int64  
 5   Log Return     249 non-null    float64
 6   Simple Return  249 non-null    float64
 7   Volatility     229 non-null    float64
dtypes: float64(7), int64(1)
memory usage: 17.6 KB
None 

             Open        High         Low       Close        Volume  \
count  250.000000  250.000000  250.000000  250.000000  2.500000e+02   
mean   298.916737  302.779731  295.069055  299.015556  6.074528e+05   
std     28.811957   28.778691   28.939455   28.961266  2.700343e+05   
min    238.420685  243.859981  237.705746  238.894095  2.080000e+05   
25%    273.099823  276.256640  268.674932  273.602193  4.089750e+05   
50%    305.380280  309.602004  302.159718  306.407031  5.297000e+05   
75%    320.797485  325.719885  318.034352  321.509976  7.366250e+05   
max    350.027618  351.747322  346.472289  350.684577  1.918700e+06   

       Log Return  Simple Return  Volatility  
count  249.000000     249.000000  229.000000  
mean     0.000322       0.000523    0.310716  
std      0.020092       0.020058    0.085889  
min     -0.076020      -0.073203    0.150593  
25%     -0.009345      -0.009301    0.251619  
50%      0.000276       0.000276    0.314504  
75%      0.013242       0.013330    0.362974  
max      0.066095       0.068329    0.536218   

Open             float64
High             float64
Low              float64
Close            float64
Volume             int64
Log Return       float64
Simple Return    float64
Volatility       float64
dtype: object
Warning: There are NaN values in the DataFrame after conversion in SPDR_S&P_Exploration_ETF_2023.csv.
Processed SPDR_S&P_Exploration_ETF_2023.csv:
                   Open        High         Low       Close   Volume  \
Date                                                                  
2023-01-03  121.890556  128.636921  120.389252  127.335149  6321700   
2023-01-04  122.318138  123.629407  119.182511  119.695615  5151500   
2023-01-05  122.498688  123.553401  120.474782  121.757540  4576400   
2023-01-06  125.007187  126.375465  123.648420  124.294543  2849100   
2023-01-09  126.280418  129.226017  125.938349  128.057286  3898700   

            Log Return  Simple Return  Volatility  
Date                                               
2023-01-03         NaN            NaN         NaN  
2023-01-04    0.003502       0.003508         NaN  
2023-01-05    0.001475       0.001476         NaN  
2023-01-06    0.020271       0.020478         NaN  
2023-01-09    0.010134       0.010185         NaN   

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 250 entries, 2023-01-03 to 2023-12-29
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Open           250 non-null    float64
 1   High           250 non-null    float64
 2   Low            250 non-null    float64
 3   Close          250 non-null    float64
 4   Volume         250 non-null    int64  
 5   Log Return     249 non-null    float64
 6   Simple Return  249 non-null    float64
 7   Volatility     229 non-null    float64
dtypes: float64(7), int64(1)
memory usage: 17.6 KB
None 

             Open        High         Low       Close        Volume  \
count  250.000000  250.000000  250.000000  250.000000  2.500000e+02   
mean   130.352460  131.907668  128.786941  130.415726  4.582904e+06   
std      9.609913    9.531173    9.744432    9.694111  1.620531e+06   
min    111.200905  113.594735  108.473866  108.939458  2.005800e+06   
25%    122.275383  123.673703  120.901568  122.411931  3.465050e+06   
50%    130.912636  132.537463  128.727188  130.847782  4.352900e+06   
75%    138.167980  139.437562  136.592073  137.770904  5.272400e+06   
max    148.309601  149.444216  147.069642  148.656721  1.538520e+07   

       Log Return  Simple Return  Volatility  
count  249.000000     249.000000  229.000000  
mean     0.000372       0.000530    0.281353  
std      0.017823       0.017817    0.065161  
min     -0.065403      -0.063311    0.161612  
25%     -0.012211      -0.012137    0.236566  
50%      0.001138       0.001138    0.278833  
75%      0.012228       0.012303    0.314357  
max      0.050920       0.052238    0.431697   

Open             float64
High             float64
Low              float64
Close            float64
Volume             int64
Log Return       float64
Simple Return    float64
Volatility       float64
dtype: object
Warning: There are NaN values in the DataFrame after conversion in ExxonMobil_2023.csv.
Processed ExxonMobil_2023.csv:
                   Open        High         Low       Close    Volume  \
Date                                                                   
2023-01-03   98.713257  101.966313   97.767919  101.743883  15146200   
2023-01-04   99.000557   99.315672   97.091355   97.128428  18058400   
2023-01-05  101.215607  101.901435   98.527891   98.555694  15946600   
2023-01-06  102.438980  103.792104  101.641933  102.021924  16348100   
2023-01-09  100.529785  103.467733   99.918095  103.467733  17964600   

            Log Return  Simple Return  Volatility  
Date                                               
2023-01-03         NaN            NaN         NaN  
2023-01-04    0.002906       0.002910         NaN  
2023-01-05    0.022127       0.022374         NaN  
2023-01-06    0.012014       0.012087         NaN  
2023-01-09   -0.018813      -0.018637         NaN   

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 250 entries, 2023-01-03 to 2023-12-29
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Open           250 non-null    float64
 1   High           250 non-null    float64
 2   Low            250 non-null    float64
 3   Close          250 non-null    float64
 4   Volume         250 non-null    int64  
 5   Log Return     249 non-null    float64
 6   Simple Return  249 non-null    float64
 7   Volatility     229 non-null    float64
dtypes: float64(7), int64(1)
memory usage: 17.6 KB
None 

             Open        High         Low       Close        Volume  \
count  250.000000  250.000000  250.000000  250.000000  2.500000e+02   
mean   102.334976  103.402265  101.293446  102.350723  1.747948e+07   
std      4.504475    4.514376    4.452520    4.511013  6.544464e+06   
min     93.243523   94.578936   91.543774   92.561761  7.397000e+06   
25%     98.984196   99.949766   97.890439   98.977115  1.347562e+07   
50%    101.492382  102.777576  100.543262  101.813100  1.583425e+07   
75%    105.413452  106.388596  104.385516  105.347774  1.944365e+07   
max    114.160667  114.635542  112.251660  113.951724  5.793900e+07   

       Log Return  Simple Return  Volatility  
count  249.000000     249.000000  229.000000  
mean    -0.000119       0.000003    0.248197  
std      0.015660       0.015677    0.050133  
min     -0.051027      -0.049747    0.157601  
25%     -0.011008      -0.010947    0.206351  
50%      0.000090       0.000090    0.244240  
75%      0.009022       0.009063    0.284913  
max      0.057326       0.059000    0.377416   

Open             float64
High             float64
Low              float64
Close            float64
Volume             int64
Log Return       float64
Simple Return    float64
Volatility       float64
dtype: object
Warning: There are NaN values in the DataFrame after conversion in Chevron_2023.csv.
Processed Chevron_2023.csv:
                   Open        High         Low       Close   Volume  \
Date                                                                  
2023-01-03  158.737152  163.307945  156.474552  162.432097  7565400   
2023-01-04  157.049301  159.010815  155.872395  155.963621  7684500   
2023-01-05  159.877563  160.744279  156.228224  156.465426  6088200   
2023-01-06  161.081818  163.663727  160.297209  161.501498  7191200   
2023-01-09  159.822830  162.696694  159.092973  162.632824  8385600   

            Log Return  Simple Return  Volatility  
Date                                               
2023-01-03         NaN            NaN         NaN  
2023-01-04   -0.010690      -0.010633         NaN  
2023-01-05    0.017849       0.018009         NaN  
2023-01-06    0.007504       0.007532         NaN  
2023-01-09   -0.007847      -0.007816         NaN   

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 250 entries, 2023-01-03 to 2023-12-29
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Open           250 non-null    float64
 1   High           250 non-null    float64
 2   Low            250 non-null    float64
 3   Close          250 non-null    float64
 4   Volume         250 non-null    int64  
 5   Log Return     249 non-null    float64
 6   Simple Return  249 non-null    float64
 7   Volatility     229 non-null    float64
dtypes: float64(7), int64(1)
memory usage: 17.6 KB
None 

             Open        High         Low       Close        Volume  \
count  250.000000  250.000000  250.000000  250.000000  2.500000e+02   
mean   148.810178  150.219206  147.493921  148.928152  8.646823e+06   
std      7.686490    7.739064    7.639849    7.794345  3.794895e+06   
min    133.188736  134.596238  132.991662  134.230300  3.272600e+06   
25%    143.320148  144.802549  141.737522  143.218724  6.400250e+06   
50%    148.376137  150.168435  147.480297  148.933785  7.474050e+06   
75%    155.577522  156.863780  154.584672  155.632771  1.000020e+07   
max    171.327362  171.345613  167.395201  168.772836  3.316180e+07   

       Log Return  Simple Return  Volatility  
count  249.000000     249.000000  229.000000  
mean    -0.000464      -0.000359    0.226455  
std      0.014534       0.014484    0.055295  
min     -0.069570      -0.067205    0.133583  
25%     -0.008345      -0.008310    0.188684  
50%     -0.000120      -0.000120    0.213583  
75%      0.007778       0.007809    0.256822  
max      0.047492       0.048637    0.367280   

Open             float64
High             float64
Low              float64
Close            float64
Volume             int64
Log Return       float64
Simple Return    float64
Volatility       float64
dtype: object
Warning: There are NaN values in the DataFrame after conversion in BPplc_2023.csv.
Processed BPplc_2023.csv:
                  Open       High        Low      Close   Volume  Log Return  \
Date                                                                          
2023-01-03  30.648390  31.425315  30.442996  31.255643  7967400         NaN   
2023-01-04  30.166155  30.416202  29.942900  30.184016  7492400   -0.015860   
2023-01-05  30.273323  30.353695  29.969697  29.987557  6440200    0.003546   
2023-01-06  30.773409  30.898431  30.380482  30.532294  6855400    0.016384   
2023-01-09  31.050249  31.478898  30.898434  31.380665  7831300    0.008956   

            Simple Return  Volatility  
Date                                   
2023-01-03            NaN         NaN  
2023-01-04      -0.015734         NaN  
2023-01-05       0.003553         NaN  
2023-01-06       0.016519         NaN  
2023-01-09       0.008996         NaN   

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 250 entries, 2023-01-03 to 2023-12-29
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Open           250 non-null    float64
 1   High           250 non-null    float64
 2   Low            250 non-null    float64
 3   Close          250 non-null    float64
 4   Volume         250 non-null    int64  
 5   Log Return     249 non-null    float64
 6   Simple Return  249 non-null    float64
 7   Volatility     229 non-null    float64
dtypes: float64(7), int64(1)
memory usage: 17.6 KB
None 

             Open        High         Low       Close        Volume  \
count  250.000000  250.000000  250.000000  250.000000  2.500000e+02   
mean    33.795749   34.063363   33.527429   33.802296  8.530306e+06   
std      1.661742    1.671672    1.671519    1.677034  3.690183e+06   
min     30.166155   30.353695   29.942900   29.987557  3.688000e+06   
25%     32.520833   32.796460   32.275912   32.563105  6.359875e+06   
50%     33.406439   33.699594   33.205290   33.447248  7.753350e+06   
75%     35.509333   35.866389   35.333720   35.610078  9.179825e+06   
max     37.641953   37.669621   37.309897   37.531264  3.522320e+07   

       Log Return  Simple Return  Volatility  
count  249.000000     249.000000  229.000000  
mean     0.000305       0.000429    0.248770  
std      0.015833       0.015799    0.067488  
min     -0.084175      -0.080730    0.140689  
25%     -0.008465      -0.008429    0.190797  
50%      0.000777       0.000777    0.243896  
75%      0.008956       0.008996    0.313371  
max      0.080219       0.083525    0.376885   

Open             float64
High             float64
Low              float64
Close            float64
Volume             int64
Log Return       float64
Simple Return    float64
Volatility       float64
dtype: object
Warning: There are NaN values in the DataFrame after conversion in Shellplc_2023.csv.
Processed Shellplc_2023.csv:
                  Open       High        Low      Close   Volume  Log Return  \
Date                                                                          
2023-01-03  51.137989  52.795999  51.101448  52.499112  7766476         NaN   
2023-01-04  50.635567  51.128860  50.361516  50.982699  5712988   -0.009873   
2023-01-05  50.681244  50.763184  50.215358  50.233629  4266050    0.000902   
2023-01-06  52.298141  52.330114  51.192802  51.265880  5845518    0.031405   
2023-01-09  53.047215  53.532287  52.709219  53.129430  4939246    0.014222   

            Simple Return  Volatility  
Date                                   
2023-01-03            NaN         NaN  
2023-01-04      -0.009825         NaN  
2023-01-05       0.000902         NaN  
2023-01-06       0.031903         NaN  
2023-01-09       0.014323         NaN   

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 250 entries, 2023-01-03 to 2023-12-29
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Open           250 non-null    float64
 1   High           250 non-null    float64
 2   Low            250 non-null    float64
 3   Close          250 non-null    float64
 4   Volume         250 non-null    int64  
 5   Log Return     249 non-null    float64
 6   Simple Return  249 non-null    float64
 7   Volatility     229 non-null    float64
dtypes: float64(7), int64(1)
memory usage: 17.6 KB
None 

             Open        High         Low       Close        Volume  \
count  250.000000  250.000000  250.000000  250.000000  2.500000e+02   
mean    57.496620   57.897276   57.085029   57.464945  4.936137e+06   
std      3.440649    3.438978    3.526171    3.554715  1.720621e+06   
min     49.741489   50.437595   48.376945   46.826244  2.036330e+06   
25%     55.240446   55.711387   54.597055   55.153586  3.693633e+06   
50%     56.828331   57.209628   56.467426   56.762466  4.597592e+06   
75%     60.703148   61.156732   60.364325   60.714167  5.767324e+06   
max     64.488960   64.667699   64.056213   64.366665  1.277934e+07   

       Log Return  Simple Return  Volatility  
count  249.000000     249.000000  229.000000  
mean     0.000808       0.000902    0.216359  
std      0.013770       0.013763    0.058839  
min     -0.067011      -0.064815    0.100732  
25%     -0.006992      -0.006968    0.180034  
50%      0.001603       0.001605    0.215089  
75%      0.009200       0.009242    0.233886  
max      0.050328       0.051616    0.392383   

Open             float64
High             float64
Low              float64
Close            float64
Volume             int64
Log Return       float64
Simple Return    float64
Volatility       float64
dtype: object
Warning: There are NaN values in the DataFrame after conversion in TotalEnergies_2023.csv.
Processed TotalEnergies_2023.csv:
                  Open       High        Low      Close   Volume  Log Return  \
Date                                                                          
2023-01-03  55.388927  56.894455  55.370897  56.678091  1876900         NaN   
2023-01-04  55.064388  55.821656  54.766887  55.488097  1507900   -0.005877   
2023-01-05  55.091434  55.262721  54.685753  54.730828  1097600    0.000491   
2023-01-06  55.740524  56.281430  55.388931  55.677415  2221500    0.011713   
2023-01-09  56.101124  56.867412  55.893776  56.696122  2200000    0.006448   

            Simple Return  Volatility  
Date                                   
2023-01-03            NaN         NaN  
2023-01-04      -0.005859         NaN  
2023-01-05       0.000491         NaN  
2023-01-06       0.011782         NaN  
2023-01-09       0.006469         NaN   

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 250 entries, 2023-01-03 to 2023-12-29
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Open           250 non-null    float64
 1   High           250 non-null    float64
 2   Low            250 non-null    float64
 3   Close          250 non-null    float64
 4   Volume         250 non-null    int64  
 5   Log Return     249 non-null    float64
 6   Simple Return  249 non-null    float64
 7   Volatility     229 non-null    float64
dtypes: float64(7), int64(1)
memory usage: 17.6 KB
None 

             Open        High         Low       Close        Volume  \
count  250.000000  250.000000  250.000000  250.000000  2.500000e+02   
mean    57.728019   58.167583   57.348465   57.793098  1.435535e+06   
std      3.630004    3.641039    3.679748    3.686205  5.987413e+05   
min     50.874840   51.294990   49.610235   50.079022  5.047000e+05   
25%     54.894386   55.448546   54.506603   54.970169  1.020625e+06   
50%     57.324015   57.588621   56.885443   57.163453  1.293200e+06   
75%     61.273447   61.753788   61.022727   61.507760  1.690550e+06   
max     64.820984   65.261491   64.633532   64.952200  4.315900e+06   

       Log Return  Simple Return  Volatility  
count  249.000000     249.000000  229.000000  
mean     0.000574       0.000677    0.228171  
std      0.014331       0.014342    0.053382  
min     -0.044171      -0.043210    0.114297  
25%     -0.009592      -0.009546    0.190619  
50%      0.001971       0.001973    0.231794  
75%      0.010083       0.010134    0.255760  
max      0.066970       0.069263    0.391471   

Open             float64
High             float64
Low              float64
Close            float64
Volume             int64
Log Return       float64
Simple Return    float64
Volatility       float64
dtype: object
Warning: There are NaN values in the DataFrame after conversion in Gold_2023.csv.
Processed Gold_2023.csv:
                    Open         High          Low        Close  Volume  \
Date                                                                     
2023-01-03  1839.699951  1839.699951  1836.199951  1836.199951      29   
2023-01-04  1852.800049  1859.099976  1845.599976  1845.599976      25   
2023-01-05  1834.800049  1855.199951  1834.800049  1855.199951      24   
2023-01-06  1864.199951  1868.199951  1835.300049  1838.400024      26   
2023-01-09  1872.699951  1880.000000  1867.000000  1867.000000      62   

            Log Return  Simple Return  Volatility  
Date                                               
2023-01-03         NaN            NaN         NaN  
2023-01-04    0.007096       0.007121         NaN  
2023-01-05   -0.009763      -0.009715         NaN  
2023-01-06    0.015896       0.016023         NaN  
2023-01-09    0.004549       0.004560         NaN   

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 250 entries, 2023-01-03 to 2023-12-29
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Open           250 non-null    float64
 1   High           250 non-null    float64
 2   Low            250 non-null    float64
 3   Close          250 non-null    float64
 4   Volume         250 non-null    int64  
 5   Log Return     249 non-null    float64
 6   Simple Return  249 non-null    float64
 7   Volatility     229 non-null    float64
dtypes: float64(7), int64(1)
memory usage: 17.6 KB
None 

              Open         High          Low        Close         Volume  \
count   250.000000   250.000000   250.000000   250.000000     250.000000   
mean   1942.769197  1950.744001  1935.432801  1942.983197    3985.948000   
std      60.083909    61.702305    59.592685    60.484172   22478.477684   
min    1808.800049  1808.800049  1808.099976  1808.099976       0.000000   
25%    1911.849976  1917.375000  1903.224976  1910.325043      59.500000   
50%    1944.450012  1957.549988  1937.900024  1945.799988     202.000000   
75%    1983.175018  1990.250031  1977.624969  1984.499969     541.500000   
max    2081.899902  2130.199951  2066.500000  2081.600098  194253.000000   

       Log Return  Simple Return  Volatility  
count  249.000000     249.000000  229.000000  
mean     0.000459       0.000494    0.127636  
std      0.008321       0.008331    0.035496  
min     -0.028262      -0.027866    0.074316  
25%     -0.004029      -0.004021    0.099957  
50%      0.000108       0.000108    0.121441  
75%      0.004800       0.004812    0.152001  
max      0.030608       0.031081    0.219037   

Open             float64
High             float64
Low              float64
Close            float64
Volume             int64
Log Return       float64
Simple Return    float64
Volatility       float64
dtype: object
Warning: There are NaN values in the DataFrame after conversion in US_13week_Treasury_2023.csv.
Processed US_13week_Treasury_2023.csv:
              Open   High    Low  Close  Volume  Log Return  Simple Return  \
Date                                                                        
2023-01-03  4.255  4.260  4.225  4.260       0         NaN            NaN   
2023-01-04  4.400  4.400  4.378  4.390       0    0.033510       0.034078   
2023-01-05  4.498  4.500  4.400  4.408       0    0.022028       0.022273   
2023-01-06  4.493  4.520  4.455  4.510       0   -0.001112      -0.001112   
2023-01-09  4.483  4.503  4.458  4.490       0   -0.002228      -0.002226   

            Volatility  
Date                    
2023-01-03         NaN  
2023-01-04         NaN  
2023-01-05         NaN  
2023-01-06         NaN  
2023-01-09         NaN   

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 250 entries, 2023-01-03 to 2023-12-29
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Open           250 non-null    float64
 1   High           250 non-null    float64
 2   Low            250 non-null    float64
 3   Close          250 non-null    float64
 4   Volume         250 non-null    int64  
 5   Log Return     249 non-null    float64
 6   Simple Return  249 non-null    float64
 7   Volatility     229 non-null    float64
dtypes: float64(7), int64(1)
memory usage: 17.6 KB
None 

            Open        High         Low      Close  Volume  Log Return  \
count  250.00000  250.000000  250.000000  250.00000   250.0  249.000000   
mean     5.04516    5.061180    5.010628    5.04550     0.0    0.000790   
std      0.29235    0.284998    0.304987    0.29456     0.0    0.008907   
min      4.25500    4.260000    4.225000    4.25800     0.0   -0.053736   
25%      4.81175    4.833750    4.715500    4.75425     0.0   -0.001909   
50%      5.20300    5.216500    5.142500    5.20500     0.0    0.000381   
75%      5.26800    5.277250    5.252250    5.27000     0.0    0.002853   
max      5.34800    5.348000    5.348000    5.34800     0.0    0.039283   

       Simple Return  Volatility  
count     249.000000  229.000000  
mean        0.000830    0.105136  
std         0.008920    0.093383  
min        -0.052318    0.021988  
25%        -0.001907    0.037481  
50%         0.000381    0.060702  
75%         0.002857    0.144180  
max         0.040065    0.356717   

Open             float64
High             float64
Low              float64
Close            float64
Volume             int64
Log Return       float64
Simple Return    float64
Volatility       float64
dtype: object
Warning: There are NaN values in the DataFrame after conversion in US_10year_Treasury_2023.csv.
Processed US_10year_Treasury_2023.csv:
              Open   High    Low  Close  Volume  Log Return  Simple Return  \
Date                                                                        
2023-01-03  3.793  3.810  3.724  3.758       0         NaN            NaN   
2023-01-04  3.709  3.728  3.664  3.677       0   -0.022395      -0.022146   
2023-01-05  3.720  3.784  3.698  3.728       0    0.002961       0.002966   
2023-01-06  3.569  3.754  3.558  3.746       0   -0.041438      -0.040591   
2023-01-09  3.517  3.591  3.508  3.589       0   -0.014677      -0.014570   

            Volatility  
Date                    
2023-01-03         NaN  
2023-01-04         NaN  
2023-01-05         NaN  
2023-01-06         NaN  
2023-01-09         NaN   

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 250 entries, 2023-01-03 to 2023-12-29
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Open           250 non-null    float64
 1   High           250 non-null    float64
 2   Low            250 non-null    float64
 3   Close          250 non-null    float64
 4   Volume         250 non-null    int64  
 5   Log Return     249 non-null    float64
 6   Simple Return  249 non-null    float64
 7   Volatility     229 non-null    float64
dtypes: float64(7), int64(1)
memory usage: 17.6 KB
None 

             Open        High         Low       Close  Volume  Log Return  \
count  250.000000  250.000000  250.000000  250.000000   250.0  249.000000   
mean     3.965036    4.004144    3.922412    3.962016     0.0    0.000077   
std      0.429600    0.430860    0.431096    0.430831     0.0    0.019115   
min      3.287000    3.305000    3.253000    3.268000     0.0   -0.060386   
25%      3.593750    3.639000    3.553000    3.586750     0.0   -0.012288   
50%      3.871500    3.906500    3.848000    3.873500     0.0    0.000000   
75%      4.263000    4.298750    4.233500    4.277000     0.0    0.012911   
max      4.988000    4.997000    4.894000    4.997000     0.0    0.042855   

       Simple Return  Volatility  
count     249.000000  229.000000  
mean        0.000258    0.294563  
std         0.019065    0.067933  
min        -0.058599    0.179998  
25%        -0.012213    0.256861  
50%         0.000000    0.280897  
75%         0.012994    0.312546  
max         0.043787    0.505455   

Open             float64
High             float64
Low              float64
Close            float64
Volume             int64
Log Return       float64
Simple Return    float64
Volatility       float64
dtype: object
Warning: There are NaN values in the DataFrame after conversion in Volatility_Index_2023.csv.
Processed Volatility_Index_2023.csv:
                  Open   High        Low      Close  Volume  Log Return  \
Date                                                                     
2023-01-03  22.900000  23.76  22.730000  23.090000       0         NaN   
2023-01-04  22.010000  23.27  21.940001  22.930000       0   -0.039640   
2023-01-05  22.459999  22.92  21.969999  22.200001       0    0.020239   
2023-01-06  21.129999  22.90  21.000000  22.690001       0   -0.061042   
2023-01-09  21.969999  21.98  21.270000  21.750000       0    0.038984   

            Simple Return  Volatility  
Date                                   
2023-01-03            NaN         NaN  
2023-01-04      -0.038865         NaN  
2023-01-05       0.020445         NaN  
2023-01-06      -0.059216         NaN  
2023-01-09       0.039754         NaN   

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 250 entries, 2023-01-03 to 2023-12-29
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Open           250 non-null    float64
 1   High           250 non-null    float64
 2   Low            250 non-null    float64
 3   Close          250 non-null    float64
 4   Volume         250 non-null    int64  
 5   Log Return     249 non-null    float64
 6   Simple Return  249 non-null    float64
 7   Volatility     229 non-null    float64
dtypes: float64(7), int64(1)
memory usage: 17.6 KB
None 

             Open        High         Low       Close  Volume  Log Return  \
count  250.000000  250.000000  250.000000  250.000000   250.0  249.000000   
mean    16.870040   17.830640   16.356960   17.119480     0.0   -0.002447   
std      3.139177    3.576509    2.885418    3.172261     0.0    0.054650   
min     12.070000   12.460000   11.810000   11.960000     0.0   -0.155894   
25%     13.932500   14.525000   13.732500   14.182500     0.0   -0.039640   
50%     16.935000   17.785001   16.345000   16.959999     0.0   -0.004484   
75%     19.080000   20.047500   18.549999   19.377500     0.0    0.024762   
max     26.520000   30.809999   24.000000   27.770000     0.0    0.168181   

       Simple Return  Volatility  
count     249.000000  229.000000  
mean       -0.000950    0.866112  
std         0.055161    0.212825  
min        -0.144350    0.404276  
25%        -0.038865    0.714051  
50%        -0.004474    0.881074  
75%         0.025071    1.011744  
max         0.183150    1.328913   

Open             float64
High             float64
Low              float64
Close            float64
Volume             int64
Log Return       float64
Simple Return    float64
Volatility       float64
dtype: object
In [ ]:
import matplotlib.pyplot as plt
for file in files:
  data = dataframes[file]
  plt.figure(figsize=(10, 6))
  plt.plot(data['Close'])  # Replace 'Close' with your column name
  plt.title('Closing Prices Over Time')
  plt.xlabel('DatetimeIndex')
  plt.ylabel('Close Price')
  plt.show()
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [ ]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

# Load the dataset
for file in files:
  data = dataframes[file]
  data['Close'] = data['Close'].replace([np.inf, -np.inf], np.nan)
  data['Close'].ffill()  # Forward fill
  data['Close'].bfill()  # Backward fill
  result = adfuller(data['Close'])
  print('ADF Statistic:', result[0])
  print('p-value:', result[1])
  if result[1]>0.05 :
    data['Close_diff'] = data['Close'].diff()
    data['Close_diff'].ffill()  # Fill NaNs after differencing
    # Check stationarity after differencing
    result_diff = adfuller(data['Close_diff'].dropna())
    print('ADF Statistic after Differencing:', result_diff[0])
    print('p-value after Differencing:', result_diff[1])
    # Determine ARIMA parameters (p, d, q)
    plot_acf(data['Close_diff'].dropna(), lags=40)
    plot_pacf(data['Close_diff'].dropna(), lags=40)
    plt.show()
    # Fit the ARIMA model (replace p, d, q with your values based on ACF/PACF)
    p, d, q = 1, 1, 2  # Example values, adjust based on your analysis
    model = ARIMA(data['Close'], order=(p, d, q))
    model_fit = model.fit()
    # Summary of the model
    print(model_fit.summary())
    # Forecasting
    forecast = model_fit.forecast(steps=30)  # Forecast for the next 30 periods
    plt.figure(figsize=(10, 6))
    plt.plot(data['Close'], label='Historical Data')
    plt.plot(forecast, label='Forecast', color='orange')
    plt.title('ARIMA Forecast of Closing Prices')
    plt.xlabel('Date')
    plt.ylabel('Close Price')
    plt.legend()
    plt.show()
ADF Statistic: -2.2037246442165834
p-value: 0.20489640433604267
ADF Statistic after Differencing: -7.511608371394751
p-value after Differencing: 4.001877169606792e-11
No description has been provided for this image
No description has been provided for this image
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:837: ValueWarning: No supported index is available. Prediction results will be given with an integer index beginning at `start`.
  return get_prediction_index(
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:837: FutureWarning: No supported index is available. In the next version, calling this method in a model without a supported index will result in an exception.
  return get_prediction_index(
                               SARIMAX Results                                
==============================================================================
Dep. Variable:                  Close   No. Observations:                  250
Model:                 ARIMA(1, 1, 2)   Log Likelihood                -479.144
Date:                Fri, 07 Mar 2025   AIC                            966.288
Time:                        18:23:35   BIC                            980.358
Sample:                             0   HQIC                           971.951
                                - 250                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.4263      0.627      0.680      0.496      -0.802       1.655
ma.L1         -0.3443      0.624     -0.551      0.581      -1.568       0.879
ma.L2         -0.1209      0.069     -1.763      0.078      -0.255       0.014
sigma2         2.7471      0.236     11.631      0.000       2.284       3.210
===================================================================================
Ljung-Box (L1) (Q):                   0.01   Jarque-Bera (JB):                 2.03
Prob(Q):                              0.92   Prob(JB):                         0.36
Heteroskedasticity (H):               1.00   Skew:                            -0.18
Prob(H) (two-sided):                  0.99   Kurtosis:                         3.25
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
No description has been provided for this image
ADF Statistic: -2.356414871643807
p-value: 0.15441759946820255
ADF Statistic after Differencing: -7.519275116258346
p-value after Differencing: 3.82900614703005e-11
No description has been provided for this image
No description has been provided for this image
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
                               SARIMAX Results                                
==============================================================================
Dep. Variable:                  Close   No. Observations:                  251
Model:                 ARIMA(1, 1, 2)   Log Likelihood                -468.292
Date:                Fri, 07 Mar 2025   AIC                            944.584
Time:                        18:23:36   BIC                            958.670
Sample:                             0   HQIC                           950.254
                                - 251                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.8578      0.134      6.407      0.000       0.595       1.120
ma.L1         -0.7427      0.139     -5.343      0.000      -1.015      -0.470
ma.L2         -0.1602      0.063     -2.547      0.011      -0.284      -0.037
sigma2         2.4796      0.223     11.096      0.000       2.042       2.918
===================================================================================
Ljung-Box (L1) (Q):                   0.00   Jarque-Bera (JB):                 5.94
Prob(Q):                              0.96   Prob(JB):                         0.05
Heteroskedasticity (H):               1.03   Skew:                            -0.38
Prob(H) (two-sided):                  0.89   Kurtosis:                         3.01
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:837: ValueWarning: No supported index is available. Prediction results will be given with an integer index beginning at `start`.
  return get_prediction_index(
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:837: FutureWarning: No supported index is available. In the next version, calling this method in a model without a supported index will result in an exception.
  return get_prediction_index(
No description has been provided for this image
ADF Statistic: -4.230410970627573
p-value: 0.0005850054404811237
ADF Statistic: -1.5930813263111132
p-value: 0.48706805903440736
ADF Statistic after Differencing: -15.20607092960768
p-value after Differencing: 5.7403941610307795e-28
No description has been provided for this image
No description has been provided for this image
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:837: ValueWarning: No supported index is available. Prediction results will be given with an integer index beginning at `start`.
  return get_prediction_index(
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:837: FutureWarning: No supported index is available. In the next version, calling this method in a model without a supported index will result in an exception.
  return get_prediction_index(
                               SARIMAX Results                                
==============================================================================
Dep. Variable:                  Close   No. Observations:                  251
Model:                 ARIMA(1, 1, 2)   Log Likelihood                 357.730
Date:                Fri, 07 Mar 2025   AIC                           -707.461
Time:                        18:23:37   BIC                           -693.375
Sample:                             0   HQIC                          -701.792
                                - 251                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.0956     30.903      0.003      0.998     -60.474      60.665
ma.L1         -0.0506     30.898     -0.002      0.999     -60.610      60.509
ma.L2         -0.0065      1.392     -0.005      0.996      -2.734       2.721
sigma2         0.0033      0.000     14.821      0.000       0.003       0.004
===================================================================================
Ljung-Box (L1) (Q):                   0.00   Jarque-Bera (JB):                50.85
Prob(Q):                              0.98   Prob(JB):                         0.00
Heteroskedasticity (H):               0.54   Skew:                            -0.32
Prob(H) (two-sided):                  0.01   Kurtosis:                         5.12
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
No description has been provided for this image
ADF Statistic: -1.8642061278731388
p-value: 0.3490824021195
ADF Statistic after Differencing: -15.388315496075856
p-value after Differencing: 3.3359286162502505e-28
No description has been provided for this image
No description has been provided for this image
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/statespace/sarimax.py:966: UserWarning: Non-stationary starting autoregressive parameters found. Using zeros as starting parameters.
  warn('Non-stationary starting autoregressive parameters'
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/statespace/sarimax.py:978: UserWarning: Non-invertible starting MA parameters found. Using zeros as starting parameters.
  warn('Non-invertible starting MA parameters found.'
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:837: ValueWarning: No supported index is available. Prediction results will be given with an integer index beginning at `start`.
  return get_prediction_index(
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:837: FutureWarning: No supported index is available. In the next version, calling this method in a model without a supported index will result in an exception.
  return get_prediction_index(
                               SARIMAX Results                                
==============================================================================
Dep. Variable:                  Close   No. Observations:                  251
Model:                 ARIMA(1, 1, 2)   Log Likelihood                 332.434
Date:                Fri, 07 Mar 2025   AIC                           -656.869
Time:                        18:23:38   BIC                           -642.783
Sample:                             0   HQIC                          -651.200
                                - 251                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.0243     74.925      0.000      1.000    -146.827     146.875
ma.L1          0.0243     74.926      0.000      1.000    -146.827     146.876
ma.L2         -0.0003      3.647  -9.36e-05      1.000      -7.149       7.148
sigma2         0.0041      0.000     11.656      0.000       0.003       0.005
===================================================================================
Ljung-Box (L1) (Q):                   0.00   Jarque-Bera (JB):                 3.50
Prob(Q):                              0.96   Prob(JB):                         0.17
Heteroskedasticity (H):               0.86   Skew:                            -0.27
Prob(H) (two-sided):                  0.51   Kurtosis:                         3.21
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
No description has been provided for this image
ADF Statistic: -2.407085800533096
p-value: 0.13974647341271612
ADF Statistic after Differencing: -15.885336836753535
p-value after Differencing: 8.663252375545324e-29
No description has been provided for this image
No description has been provided for this image
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:837: ValueWarning: No supported index is available. Prediction results will be given with an integer index beginning at `start`.
  return get_prediction_index(
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:837: FutureWarning: No supported index is available. In the next version, calling this method in a model without a supported index will result in an exception.
  return get_prediction_index(
                               SARIMAX Results                                
==============================================================================
Dep. Variable:                  Close   No. Observations:                  250
Model:                 ARIMA(1, 1, 2)   Log Likelihood                -403.045
Date:                Fri, 07 Mar 2025   AIC                            814.090
Time:                        18:23:39   BIC                            828.159
Sample:                             0   HQIC                           819.753
                                - 250                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1         -0.3419      0.427     -0.801      0.423      -1.179       0.495
ma.L1          0.3529      0.426      0.829      0.407      -0.481       1.187
ma.L2         -0.0992      0.065     -1.518      0.129      -0.227       0.029
sigma2         1.4908      0.131     11.392      0.000       1.234       1.747
===================================================================================
Ljung-Box (L1) (Q):                   0.00   Jarque-Bera (JB):                 1.08
Prob(Q):                              0.99   Prob(JB):                         0.58
Heteroskedasticity (H):               0.50   Skew:                            -0.15
Prob(H) (two-sided):                  0.00   Kurtosis:                         3.11
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
No description has been provided for this image
ADF Statistic: -1.7385971808852456
p-value: 0.41134372256779017
ADF Statistic after Differencing: -11.947236185782739
p-value after Differencing: 4.401029357726941e-22
No description has been provided for this image
No description has been provided for this image
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:837: ValueWarning: No supported index is available. Prediction results will be given with an integer index beginning at `start`.
  return get_prediction_index(
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:837: FutureWarning: No supported index is available. In the next version, calling this method in a model without a supported index will result in an exception.
  return get_prediction_index(
                               SARIMAX Results                                
==============================================================================
Dep. Variable:                  Close   No. Observations:                  250
Model:                 ARIMA(1, 1, 2)   Log Likelihood                -812.038
Date:                Fri, 07 Mar 2025   AIC                           1632.077
Time:                        18:23:40   BIC                           1646.147
Sample:                             0   HQIC                          1637.740
                                - 250                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1         -0.1640      0.490     -0.335      0.738      -1.124       0.796
ma.L1          0.2253      0.490      0.459      0.646      -0.736       1.186
ma.L2         -0.0951      0.069     -1.378      0.168      -0.230       0.040
sigma2        39.8209      3.166     12.579      0.000      33.616      46.026
===================================================================================
Ljung-Box (L1) (Q):                   0.00   Jarque-Bera (JB):                 3.66
Prob(Q):                              0.96   Prob(JB):                         0.16
Heteroskedasticity (H):               0.46   Skew:                            -0.07
Prob(H) (two-sided):                  0.00   Kurtosis:                         3.58
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
No description has been provided for this image
ADF Statistic: -2.011030727205811
p-value: 0.28175755214705084
ADF Statistic after Differencing: -16.2162791677464
p-value after Differencing: 3.9394890209660076e-29
No description has been provided for this image
No description has been provided for this image
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:837: ValueWarning: No supported index is available. Prediction results will be given with an integer index beginning at `start`.
  return get_prediction_index(
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:837: FutureWarning: No supported index is available. In the next version, calling this method in a model without a supported index will result in an exception.
  return get_prediction_index(
                               SARIMAX Results                                
==============================================================================
Dep. Variable:                  Close   No. Observations:                  250
Model:                 ARIMA(1, 1, 2)   Log Likelihood                -578.759
Date:                Fri, 07 Mar 2025   AIC                           1165.518
Time:                        18:23:41   BIC                           1179.588
Sample:                             0   HQIC                          1171.181
                                - 250                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1         -0.2149      0.780     -0.275      0.783      -1.744       1.314
ma.L1          0.2014      0.782      0.258      0.797      -1.331       1.734
ma.L2         -0.0766      0.060     -1.273      0.203      -0.195       0.041
sigma2         6.1151      0.558     10.953      0.000       5.021       7.209
===================================================================================
Ljung-Box (L1) (Q):                   0.00   Jarque-Bera (JB):                 1.99
Prob(Q):                              0.99   Prob(JB):                         0.37
Heteroskedasticity (H):               0.61   Skew:                            -0.22
Prob(H) (two-sided):                  0.03   Kurtosis:                         2.96
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
No description has been provided for this image
ADF Statistic: -2.851425951173653
p-value: 0.05129509801105939
ADF Statistic after Differencing: -12.478638254300106
p-value after Differencing: 3.1272223854677037e-23
No description has been provided for this image
No description has been provided for this image
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:837: ValueWarning: No supported index is available. Prediction results will be given with an integer index beginning at `start`.
  return get_prediction_index(
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:837: FutureWarning: No supported index is available. In the next version, calling this method in a model without a supported index will result in an exception.
  return get_prediction_index(
                               SARIMAX Results                                
==============================================================================
Dep. Variable:                  Close   No. Observations:                  250
Model:                 ARIMA(1, 1, 2)   Log Likelihood                -482.359
Date:                Fri, 07 Mar 2025   AIC                            972.718
Time:                        18:23:41   BIC                            986.788
Sample:                             0   HQIC                           978.382
                                - 250                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1         -0.3358      0.371     -0.904      0.366      -1.064       0.392
ma.L1          0.3356      0.369      0.910      0.363      -0.387       1.058
ma.L2         -0.1303      0.061     -2.141      0.032      -0.250      -0.011
sigma2         2.8188      0.250     11.291      0.000       2.330       3.308
===================================================================================
Ljung-Box (L1) (Q):                   0.00   Jarque-Bera (JB):                 2.65
Prob(Q):                              0.99   Prob(JB):                         0.27
Heteroskedasticity (H):               0.57   Skew:                            -0.25
Prob(H) (two-sided):                  0.01   Kurtosis:                         3.05
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
No description has been provided for this image
ADF Statistic: -2.485018534492955
p-value: 0.1191621590776008
ADF Statistic after Differencing: -15.90438866986888
p-value after Differencing: 8.25912546722968e-29
No description has been provided for this image
No description has been provided for this image
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:837: ValueWarning: No supported index is available. Prediction results will be given with an integer index beginning at `start`.
  return get_prediction_index(
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:837: FutureWarning: No supported index is available. In the next version, calling this method in a model without a supported index will result in an exception.
  return get_prediction_index(
                               SARIMAX Results                                
==============================================================================
Dep. Variable:                  Close   No. Observations:                  250
Model:                 ARIMA(1, 1, 2)   Log Likelihood                -545.645
Date:                Fri, 07 Mar 2025   AIC                           1099.289
Time:                        18:23:42   BIC                           1113.359
Sample:                             0   HQIC                          1104.952
                                - 250                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1         -0.2324      0.508     -0.457      0.647      -1.229       0.764
ma.L1          0.2410      0.503      0.479      0.632      -0.746       1.228
ma.L2         -0.1015      0.061     -1.676      0.094      -0.220       0.017
sigma2         4.6866      0.400     11.727      0.000       3.903       5.470
===================================================================================
Ljung-Box (L1) (Q):                   0.00   Jarque-Bera (JB):                 1.81
Prob(Q):                              0.98   Prob(JB):                         0.40
Heteroskedasticity (H):               0.51   Skew:                            -0.18
Prob(H) (two-sided):                  0.00   Kurtosis:                         3.21
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
No description has been provided for this image
ADF Statistic: -2.961931402381136
p-value: 0.03860572787149292
ADF Statistic: -2.2496766323220068
p-value: 0.18871193673201914
ADF Statistic after Differencing: -18.551387651009644
p-value after Differencing: 2.092251140994728e-30
No description has been provided for this image
No description has been provided for this image
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
                               SARIMAX Results                                
==============================================================================
Dep. Variable:                  Close   No. Observations:                  250
Model:                 ARIMA(1, 1, 2)   Log Likelihood                -351.203
Date:                Fri, 07 Mar 2025   AIC                            710.406
Time:                        18:23:44   BIC                            724.476
Sample:                             0   HQIC                           716.069
                                - 250                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.8376      0.090      9.351      0.000       0.662       1.013
ma.L1         -1.0342      0.103    -10.040      0.000      -1.236      -0.832
ma.L2          0.0923      0.064      1.442      0.149      -0.033       0.218
sigma2         0.9821      0.043     22.628      0.000       0.897       1.067
===================================================================================
Ljung-Box (L1) (Q):                   0.05   Jarque-Bera (JB):              1075.00
Prob(Q):                              0.82   Prob(JB):                         0.00
Heteroskedasticity (H):               0.29   Skew:                            -1.29
Prob(H) (two-sided):                  0.00   Kurtosis:                        12.85
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:837: ValueWarning: No supported index is available. Prediction results will be given with an integer index beginning at `start`.
  return get_prediction_index(
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:837: FutureWarning: No supported index is available. In the next version, calling this method in a model without a supported index will result in an exception.
  return get_prediction_index(
No description has been provided for this image
ADF Statistic: -1.5745421006952112
p-value: 0.4963679655554053
ADF Statistic after Differencing: -6.294550627148641
p-value after Differencing: 3.5319865896368846e-08
No description has been provided for this image
No description has been provided for this image
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/statespace/sarimax.py:978: UserWarning: Non-invertible starting MA parameters found. Using zeros as starting parameters.
  warn('Non-invertible starting MA parameters found.'
                               SARIMAX Results                                
==============================================================================
Dep. Variable:                  Close   No. Observations:                  250
Model:                 ARIMA(1, 1, 2)   Log Likelihood                -324.763
Date:                Fri, 07 Mar 2025   AIC                            657.525
Time:                        18:23:45   BIC                            671.595
Sample:                             0   HQIC                           663.188
                                - 250                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.7320      0.176      4.151      0.000       0.386       1.078
ma.L1         -0.8161      0.185     -4.412      0.000      -1.179      -0.454
ma.L2         -0.0240      0.077     -0.311      0.756      -0.175       0.127
sigma2         0.7948      0.060     13.184      0.000       0.677       0.913
===================================================================================
Ljung-Box (L1) (Q):                   0.00   Jarque-Bera (JB):                34.78
Prob(Q):                              0.96   Prob(JB):                         0.00
Heteroskedasticity (H):               0.60   Skew:                            -0.40
Prob(H) (two-sided):                  0.02   Kurtosis:                         4.64
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:837: ValueWarning: No supported index is available. Prediction results will be given with an integer index beginning at `start`.
  return get_prediction_index(
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:837: FutureWarning: No supported index is available. In the next version, calling this method in a model without a supported index will result in an exception.
  return get_prediction_index(
No description has been provided for this image
ADF Statistic: -1.6674665747390252
p-value: 0.44794778085361125
ADF Statistic after Differencing: -4.671961998026451
p-value after Differencing: 9.497316833766148e-05
No description has been provided for this image
No description has been provided for this image
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:837: ValueWarning: No supported index is available. Prediction results will be given with an integer index beginning at `start`.
  return get_prediction_index(
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:837: FutureWarning: No supported index is available. In the next version, calling this method in a model without a supported index will result in an exception.
  return get_prediction_index(
                               SARIMAX Results                                
==============================================================================
Dep. Variable:                  Close   No. Observations:                  250
Model:                 ARIMA(1, 1, 2)   Log Likelihood               -1034.789
Date:                Fri, 07 Mar 2025   AIC                           2077.579
Time:                        18:23:46   BIC                           2091.648
Sample:                             0   HQIC                          2083.242
                                - 250                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1         -0.0188      0.708     -0.027      0.979      -1.407       1.369
ma.L1          0.0912      0.711      0.128      0.898      -1.302       1.484
ma.L2         -0.0891      0.086     -1.041      0.298      -0.257       0.079
sigma2       238.3134     18.904     12.607      0.000     201.262     275.365
===================================================================================
Ljung-Box (L1) (Q):                   0.00   Jarque-Bera (JB):                 3.81
Prob(Q):                              0.95   Prob(JB):                         0.15
Heteroskedasticity (H):               0.78   Skew:                             0.06
Prob(H) (two-sided):                  0.27   Kurtosis:                         3.59
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
No description has been provided for this image
ADF Statistic: -2.1315838827076186
p-value: 0.2320056291934623
ADF Statistic after Differencing: -6.2095890644999026
p-value after Differencing: 5.549472251241006e-08
No description has been provided for this image
No description has been provided for this image
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
                               SARIMAX Results                                
==============================================================================
Dep. Variable:                  Close   No. Observations:                  250
Model:                 ARIMA(1, 1, 2)   Log Likelihood                 382.552
Date:                Fri, 07 Mar 2025   AIC                           -757.103
Time:                        18:23:48   BIC                           -743.033
Sample:                             0   HQIC                          -751.440
                                - 250                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1         -0.8704      0.025    -35.316      0.000      -0.919      -0.822
ma.L1          0.5165      0.057      9.022      0.000       0.404       0.629
ma.L2         -0.4692      0.031    -15.110      0.000      -0.530      -0.408
sigma2         0.0027      0.000     19.474      0.000       0.002       0.003
===================================================================================
Ljung-Box (L1) (Q):                   0.10   Jarque-Bera (JB):              3030.84
Prob(Q):                              0.75   Prob(JB):                         0.00
Heteroskedasticity (H):               0.03   Skew:                            -1.46
Prob(H) (two-sided):                  0.00   Kurtosis:                        19.84
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:837: ValueWarning: No supported index is available. Prediction results will be given with an integer index beginning at `start`.
  return get_prediction_index(
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:837: FutureWarning: No supported index is available. In the next version, calling this method in a model without a supported index will result in an exception.
  return get_prediction_index(
No description has been provided for this image
ADF Statistic: -1.3714694524933961
p-value: 0.5958750433232802
ADF Statistic after Differencing: -12.217911691427327
p-value after Differencing: 1.1245245775203403e-22
No description has been provided for this image
No description has been provided for this image
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
                               SARIMAX Results                                
==============================================================================
Dep. Variable:                  Close   No. Observations:                  250
Model:                 ARIMA(1, 1, 2)   Log Likelihood                 300.303
Date:                Fri, 07 Mar 2025   AIC                           -592.606
Time:                        18:23:49   BIC                           -578.536
Sample:                             0   HQIC                          -586.942
                                - 250                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.0882      0.564      0.156      0.876      -1.017       1.193
ma.L1         -0.0443      0.563     -0.079      0.937      -1.147       1.059
ma.L2         -0.1112      0.066     -1.678      0.093      -0.241       0.019
sigma2         0.0052      0.000     13.452      0.000       0.004       0.006
===================================================================================
Ljung-Box (L1) (Q):                   0.00   Jarque-Bera (JB):                22.28
Prob(Q):                              1.00   Prob(JB):                         0.00
Heteroskedasticity (H):               0.77   Skew:                            -0.49
Prob(H) (two-sided):                  0.23   Kurtosis:                         4.09
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:837: ValueWarning: No supported index is available. Prediction results will be given with an integer index beginning at `start`.
  return get_prediction_index(
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:837: FutureWarning: No supported index is available. In the next version, calling this method in a model without a supported index will result in an exception.
  return get_prediction_index(
No description has been provided for this image
ADF Statistic: -2.1842216820409472
p-value: 0.2120218324904929
ADF Statistic after Differencing: -11.058379271011715
p-value after Differencing: 4.879328137171754e-20
No description has been provided for this image
No description has been provided for this image
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:837: ValueWarning: No supported index is available. Prediction results will be given with an integer index beginning at `start`.
  return get_prediction_index(
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model.py:837: FutureWarning: No supported index is available. In the next version, calling this method in a model without a supported index will result in an exception.
  return get_prediction_index(
                               SARIMAX Results                                
==============================================================================
Dep. Variable:                  Close   No. Observations:                  250
Model:                 ARIMA(1, 1, 2)   Log Likelihood                -358.166
Date:                Fri, 07 Mar 2025   AIC                            724.331
Time:                        18:23:50   BIC                            738.401
Sample:                             0   HQIC                           729.995
                                - 250                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1         -0.8895      0.083    -10.746      0.000      -1.052      -0.727
ma.L1          0.7443      0.105      7.075      0.000       0.538       0.951
ma.L2         -0.0087      0.064     -0.137      0.891      -0.134       0.117
sigma2         1.0389      0.068     15.372      0.000       0.906       1.171
===================================================================================
Ljung-Box (L1) (Q):                   0.00   Jarque-Bera (JB):                41.40
Prob(Q):                              0.97   Prob(JB):                         0.00
Heteroskedasticity (H):               0.58   Skew:                             0.25
Prob(H) (two-sided):                  0.01   Kurtosis:                         4.93
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
No description has been provided for this image
In [ ]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.impute import SimpleImputer # Import SimpleImputer

# Load the dataset
for file in files:
  data = dataframes[file]
  X = data[['Open','High','Low','Volume','Log Return','Simple Return','Volatility']]  # Replace
  y = data['Close']  # Target variable
  # Split the data into training and testing sets
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

  # Create an imputer to fill NaN values with the mean
  imputer = SimpleImputer(strategy='mean')

  # Fit the imputer on the training data and transform both training and testing data
  X_train = imputer.fit_transform(X_train)
  X_test = imputer.transform(X_test)

  # Create a linear regression model
  model = LinearRegression()

  # Fit the model to the training data
  model.fit(X_train, y_train)

  # Make predictions on the test set
  y_pred = model.predict(X_test)

  # Evaluate the model
  mse = mean_squared_error(y_test, y_pred)
  r2 = r2_score(y_test, y_pred)

  print(f'Mean Squared Error: {mse}')
  print(f'R-squared: {r2}')

  # Plotting the results
  plt.figure(figsize=(10, 6))
  plt.scatter(y_test, y_pred)
  plt.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=2)  # Diagonal line
  plt.xlabel('Actual Close Prices')
  plt.ylabel('Predicted Close Prices')
  plt.title('Actual vs Predicted Close Prices')
  plt.show()
Mean Squared Error: 0.1813687566677913
R-squared: 0.9961555540816833
No description has been provided for this image
Mean Squared Error: 0.11533009611589407
R-squared: 0.9963099463726139
No description has been provided for this image
Mean Squared Error: 0.0017224296658761961
R-squared: 0.9883147020108428
No description has been provided for this image
Mean Squared Error: 0.0002008577228944818
R-squared: 0.9957808710203938
No description has been provided for this image
Mean Squared Error: 0.00018706988982735527
R-squared: 0.9984121086165332
No description has been provided for this image
Mean Squared Error: 0.1425501471625193
R-squared: 0.9916387603632241
No description has been provided for this image
Mean Squared Error: 2.9693765885142356
R-squared: 0.9962178416914171
No description has been provided for this image
Mean Squared Error: 0.5487861761102647
R-squared: 0.9931862472403402
No description has been provided for this image
Mean Squared Error: 0.23021411432534136
R-squared: 0.9888584299259457
No description has been provided for this image
Mean Squared Error: 0.4346677578903055
R-squared: 0.9937763746499695
No description has been provided for this image
Mean Squared Error: 0.022405156468870823
R-squared: 0.9934826699000399
No description has been provided for this image
Mean Squared Error: 0.05799087080325329
R-squared: 0.9954513356239653
No description has been provided for this image
Mean Squared Error: 0.048691398172960865
R-squared: 0.9962854214055153
No description has been provided for this image
Mean Squared Error: 26.199643107684075
R-squared: 0.9910209166852437
No description has been provided for this image
Mean Squared Error: 0.00023841164018041864
R-squared: 0.9974033065559662
No description has been provided for this image
Mean Squared Error: 0.00041059050378594803
R-squared: 0.9978837480393812
No description has been provided for this image
Mean Squared Error: 0.11066308656023366
R-squared: 0.9859075069976125
No description has been provided for this image
In [ ]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt

# Load the dataset
for file in files:
    data = dataframes[file]  # Replace with your file path

    # Prepare the data
    # Select features and target variable
    # Ensure features are numeric and drop rows with non-numeric values
    X = data[['Open', 'High', 'Low', 'Volume', 'Simple Return', 'Log Return', 'Volatility']].apply(pd.to_numeric, errors='coerce').dropna()
    y = data['Close'].apply(pd.to_numeric, errors='coerce').dropna()  # Ensure target is numeric

    # Reindex y to align with X after dropping rows
    y = y.reindex(X.index)

    # Check if there is enough data after dropping rows
    if len(X) < 2 or len(y) < 2:
        print(f"Skipping {file} due to insufficient data after cleaning.")
        continue  # Skip to the next file

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Create a Random Forest model
    model = RandomForestRegressor(n_estimators=100, random_state=42)

    # Fit the model to the training data
    model.fit(X_train, y_train)

    # Make predictions on the test set
    y_pred = model.predict(X_test)

    # Evaluate the model
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    print(f'Results for {file}:')
    print(f'Mean Squared Error: {mse}')
    print(f'R-squared: {r2}')

    # Plotting the results
    plt.figure(figsize=(10, 6))
    plt.scatter(y_test, y_pred)
    plt.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=2)  # Diagonal line
    plt.xlabel('Actual Close Prices')
    plt.ylabel('Predicted Close Prices')
    plt.title(f'Actual vs Predicted Close Prices using Random Forest for {file}')
    plt.show()
Results for Crude_Oil_2023.csv:
Mean Squared Error: 0.5802596279319124
R-squared: 0.984265298729512
No description has been provided for this image
Results for Brent_Oil_2023.csv:
Mean Squared Error: 0.5201383695127169
R-squared: 0.9830945628333059
No description has been provided for this image
Results for Natural_Gas_2023.csv:
Mean Squared Error: 0.0016168512747690238
R-squared: 0.9765213310751648
No description has been provided for this image
Results for RBOB_Gasoline_2023.csv:
Mean Squared Error: 0.0005004918138169276
R-squared: 0.9905118249777439
No description has been provided for this image
Results for Heating_Oil_2023.csv:
Mean Squared Error: 0.0011989067820372468
R-squared: 0.986460754483685
No description has been provided for this image
Results for Energy_SPDR_ETF_2023.csv:
Mean Squared Error: 0.2751918315766939
R-squared: 0.983213325221418
No description has been provided for this image
Results for VanEck_Oil_ETF_2023.csv:
Mean Squared Error: 9.851069227803611
R-squared: 0.987462201637468
No description has been provided for this image
Results for SPDR_S&P_Exploration_ETF_2023.csv:
Mean Squared Error: 1.323724018462013
R-squared: 0.9857080884015251
No description has been provided for this image
Results for ExxonMobil_2023.csv:
Mean Squared Error: 0.3937363769141254
R-squared: 0.9808345374442137
No description has been provided for this image
Results for Chevron_2023.csv:
Mean Squared Error: 0.6016592819174916
R-squared: 0.986821507014054
No description has been provided for this image
Results for BPplc_2023.csv:
Mean Squared Error: 0.02933753998424803
R-squared: 0.9883533265196377
No description has been provided for this image
Results for Shellplc_2023.csv:
Mean Squared Error: 0.09466437936425558
R-squared: 0.9894474793594216
No description has been provided for this image
Results for TotalEnergies_2023.csv:
Mean Squared Error: 0.11317455262623008
R-squared: 0.9915673378027832
No description has been provided for this image
Results for Gold_2023.csv:
Mean Squared Error: 33.71895283403927
R-squared: 0.9899899708860083
No description has been provided for this image
Results for US_13week_Treasury_2023.csv:
Mean Squared Error: 0.0004889629950096581
R-squared: 0.9901711821336804
No description has been provided for this image
Results for US_10year_Treasury_2023.csv:
Mean Squared Error: 0.001340729666759866
R-squared: 0.9912895687142032
No description has been provided for this image
Results for Volatility_Index_2023.csv:
Mean Squared Error: 0.15357745575949064
R-squared: 0.9838224315727264
No description has been provided for this image
In [ ]:
#Monte Carlo Simuation
import pandas as pd

for file in files:
  df = dataframes[file]
  mean_return = df['Simple Return'].mean()
  std_dev_return = df['Simple Return'].std()
  num_simulations = 10000
  num_days = 252  # Number of trading days to simulate
  last_price = df['Close'].iloc[-1]
  print(f"Mean Return: {mean_return}")
  print(f"Standard Deviation of Returns: {std_dev_return}")
  simulated_prices = np.zeros((num_days, num_simulations))
  for i in range(num_simulations):
    random_returns = np.random.normal(mean_return, std_dev_return, num_days)
    price_path = last_price * (1 + random_returns).cumprod()
    simulated_prices[:, i] = price_path
    plt.figure(figsize=(12, 6))
    plt.plot(simulated_prices, color='blue', alpha=0.1)
    plt.title('Monte Carlo Simulation of Price Paths')
    plt.xlabel('Days')
    plt.ylabel('Price')
    plt.show()
    final_prices = simulated_prices[-1, :]
Mean Return: -6.0272543964092305e-05
Standard Deviation of Returns: 0.021231645734711615
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-23-72b84e02d85d> in <cell line: 0>()
     17     simulated_prices[:, i] = price_path
     18     plt.figure(figsize=(12, 6))
---> 19     plt.plot(simulated_prices, color='blue', alpha=0.1)
     20     plt.title('Monte Carlo Simulation of Price Paths')
     21     plt.xlabel('Days')

/usr/local/lib/python3.11/dist-packages/matplotlib/pyplot.py in plot(scalex, scaley, data, *args, **kwargs)
   3827     **kwargs,
   3828 ) -> list[Line2D]:
-> 3829     return gca().plot(
   3830         *args,
   3831         scalex=scalex,

/usr/local/lib/python3.11/dist-packages/matplotlib/axes/_axes.py in plot(self, scalex, scaley, data, *args, **kwargs)
   1777         lines = [*self._get_lines(self, *args, data=data, **kwargs)]
   1778         for line in lines:
-> 1779             self.add_line(line)
   1780         if scalex:
   1781             self._request_autoscale_view("x")

/usr/local/lib/python3.11/dist-packages/matplotlib/axes/_base.py in add_line(self, line)
   2367         self._set_artist_props(line)
   2368         if line.get_clip_path() is None:
-> 2369             line.set_clip_path(self.patch)
   2370 
   2371         self._update_line_limits(line)

/usr/local/lib/python3.11/dist-packages/matplotlib/artist.py in set_clip_path(self, path, transform)
    813             if isinstance(path, Rectangle):
    814                 self.clipbox = TransformedBbox(Bbox.unit(),
--> 815                                                path.get_transform())
    816                 self._clippath = None
    817                 success = True

/usr/local/lib/python3.11/dist-packages/matplotlib/patches.py in get_transform(self)
    307     def get_transform(self):
    308         """Return the `~.transforms.Transform` applied to the `Patch`."""
--> 309         return self.get_patch_transform() + artist.Artist.get_transform(self)
    310 
    311     def get_data_transform(self):

/usr/local/lib/python3.11/dist-packages/matplotlib/patches.py in get_patch_transform(self)
    813         return transforms.BboxTransformTo(bbox) \
    814                 + transforms.Affine2D() \
--> 815                 .translate(-rotation_point[0], -rotation_point[1]) \
    816                 .scale(1, self._aspect_ratio_correction) \
    817                 .rotate_deg(self.angle) \

/usr/local/lib/python3.11/dist-packages/matplotlib/transforms.py in translate(self, tx, ty)
   2035         self._mtx[0, 2] += tx
   2036         self._mtx[1, 2] += ty
-> 2037         self.invalidate()
   2038         return self
   2039 

/usr/local/lib/python3.11/dist-packages/matplotlib/transforms.py in invalidate(self)
    158         ancestors.  Should be called any time the transform changes.
    159         """
--> 160         return self._invalidate_internal(
    161             level=self._INVALID_AFFINE_ONLY if self.is_affine else self._INVALID_FULL,
    162             invalidating_node=self)

/usr/local/lib/python3.11/dist-packages/matplotlib/transforms.py in _invalidate_internal(self, level, invalidating_node)
    172             return
    173         self._invalid = level
--> 174         for parent in list(self._parents.values()):
    175             parent = parent()  # Dereference the weak reference.
    176             if parent is not None:

KeyboardInterrupt: 
No description has been provided for this image
In [ ]:
final_prices = simulated_prices[-1, :]
plt.figure(figsize=(12, 6))
plt.hist(final_prices, bins=50, alpha=0.7)
plt.title('Distribution of Final Prices After Simulation')
plt.xlabel('Final Price')
plt.ylabel('Frequency')
plt.show()
No description has been provided for this image
In [ ]:
 
In [ ]: